In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
In [2]:
df = pd.read_csv('insurance.csv')
df.head()
Out[2]:
age sex bmi children smoker region charges
0 19 female 27.900 0 yes southwest 16884.92400
1 18 male 33.770 1 no southeast 1725.55230
2 28 male 33.000 3 no southeast 4449.46200
3 33 male 22.705 0 no northwest 21984.47061
4 32 male 28.880 0 no northwest 3866.85520
In [3]:
df.region.value_counts()
Out[3]:
southeast    364
southwest    325
northwest    325
northeast    324
Name: region, dtype: int64
In [4]:
df.shape
Out[4]:
(1338, 7)
In [5]:
results = pd.read_csv('results.csv')
results.head()
Out[5]:
Age Sex Bmi Children Smoker Region ActualValue PredictedValue
0 32.0 1.0 28.88 0.0 1.0 1.0 3866.8552 5725.318890
1 25.0 1.0 26.22 0.0 1.0 0.5 2721.3208 2807.816971
2 23.0 1.0 34.40 0.0 1.0 -0.5 1826.8430 4664.712803
3 56.0 -1.0 39.82 0.0 1.0 -1.0 11090.7178 15065.692717
4 19.0 1.0 24.60 1.0 1.0 -0.5 1837.2370 741.034551
In [6]:
fig = px.scatter(results, x='ActualValue', y='PredictedValue')
x = np.linspace(results.ActualValue.min(), results.ActualValue.max(), num=1000)
fig.add_scatter(x=x, y=x, name='y=x')
fig.show()